The Data

The data used in this notebook is from the COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University. The package used to retrieve data information can be found here.
Accessed dataset on: 2020-08-11

Preview of Dataset

data("coronavirus")
head(coronavirus)
coronavirus <- coronavirus %>%
  mutate(country = replace(country, country == "US", "United States"))

# Fill empty province with NA
coronavirus$province[coronavirus$province == ""] <- NA
# Population data
library(wpp2019)
data(pop)

keeps <- c("name","2020")
pop_2020 = pop[keeps]
names(pop_2020)[2] <- "population"
pop_2020 <- pop_2020 %>%
  mutate(name = replace(name, name == "United States of America", "United States")) %>%
  mutate(name = replace(name, name == "Iran (Islamic Republic of)", "Iran")) %>%
  mutate(name = replace(name, name == "Russian Federation", "Russia")) %>%
  mutate(name = replace(name, name == "Bolivia (Plurinational State of)", "Bolivia")) %>%
  mutate(name = replace(name, name == "Republic of Moldova", "Moldova")) %>%
  mutate(name = replace(name, name == "Venezuela (Bolivarian Republic of)", "Venezuela"))
pop_2020$population <- pop_2020$population*1000

# add population of each country
cases_pc_df <- left_join(coronavirus, pop_2020, by = c("country" = "name"))

Exploration of Cases Throughout The World

Countries With Highest Cases

Confirmed Case by Country

Death Cases by Country

Recover Cases by Country

Timeline

World Confirmed Cases Timeline

Timeline of Higest Confirmed Cases by Country

Worldmap Visualization

---
title: "COVID-19 Visualizations"
output:
  html_notebook:
    toc: yes
    code_folding: hide
---
<div style="margin-bottom:100px;"></div>

# The Data
The data used in this notebook is from the [COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University](https://github.com/CSSEGISandData/COVID-19). The package used to retrieve data information can be found [here](https://github.com/RamiKrispin/coronavirus). <br></br>
**Accessed dataset on: ** 2020-08-11

```{r echo=FALSE, warning=FALSE, results='hide'}
library(tidyverse)
library(plotly)
library(scales)
library(coronavirus)
#update_dataset()
```

<div style="margin-bottom:50px;"></div>
#### Preview of Dataset
```{r}
data("coronavirus")
head(coronavirus)
```

```{r}
coronavirus <- coronavirus %>%
  mutate(country = replace(country, country == "US", "United States"))

# Fill empty province with NA
coronavirus$province[coronavirus$province == ""] <- NA
```

```{r}
# Population data
library(wpp2019)
data(pop)

keeps <- c("name","2020")
pop_2020 = pop[keeps]
names(pop_2020)[2] <- "population"
pop_2020 <- pop_2020 %>%
  mutate(name = replace(name, name == "United States of America", "United States")) %>%
  mutate(name = replace(name, name == "Iran (Islamic Republic of)", "Iran")) %>%
  mutate(name = replace(name, name == "Russian Federation", "Russia")) %>%
  mutate(name = replace(name, name == "Bolivia (Plurinational State of)", "Bolivia")) %>%
  mutate(name = replace(name, name == "Republic of Moldova", "Moldova")) %>%
  mutate(name = replace(name, name == "Venezuela (Bolivarian Republic of)", "Venezuela"))
pop_2020$population <- pop_2020$population*1000

# add population of each country
cases_pc_df <- left_join(coronavirus, pop_2020, by = c("country" = "name"))

```

# Exploration of Cases Throughout The World

## Countries With Highest Cases

### Confirmed Case by Country

```{r echo=FALSE, warning=FALSE, results='hide'}
world_confirmed_cases_df <- cases_pc_df %>% 
  filter(type == "confirmed") %>%
  group_by(country, long, lat, population, province) %>%
  summarise(total_cases = sum(cases)) %>%
  arrange(-total_cases)
```

```{r echo=FALSE, warning=FALSE}
top_10_confirmed_df <- world_confirmed_cases_df[1:10,]

world_confirmed_graph <-
 ggplot(data = top_10_confirmed_df,
        aes(x = reorder(country, total_cases),
            y = total_cases)) +
  labs( x = "Country",
        y = "Total Confirmed Cases",
        title = "Top 10 Countries With Higest Total Confirm Cases") +
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  geom_col(aes(fill = total_cases)) +
  scale_fill_gradient2(low = "thistle1", 
                       high = "mediumorchid1",
                       midpoint = median(top_10_confirmed_df$total_cases)) +
  geom_col(
    aes( y= 40),
    fill = "white",
    width = 1,
    alpha = 0.2,
    size = 0
  ) +
  geom_col(
    aes( y = 20),
    fill = "white",
    width = 1,
    alpha = 0.2,
    size = 0
  )

world_confirmed_graph
```

### Death Cases by Country

```{r echo=FALSE, warning=FALSE, results='hide'}
world_death_cases_df <- coronavirus %>% 
  filter(type == "death") %>%
  group_by(country, long, lat, province) %>%
  summarise(total_deaths = sum(cases)) %>%
  arrange(-total_deaths)

top_10_death_df <- world_death_cases_df[1:10,]

world_death_graph <-
 ggplot(data = top_10_death_df, aes(x = reorder(country, total_deaths), y = total_deaths)) +
  geom_col(aes(fill = total_deaths)) +
    labs( x = "Country",
        y = "Total Death Cases",
        title = "Top 10 Countries With Higest Death Cases") +
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_fill_gradient2(low = "red", 
                       high = "red4",
                       midpoint = median(top_10_death_df$total_deaths)) +
  geom_col(
    aes( y= 40),
    fill = "white",
    width = 1,
    alpha = 0.2,
    size = 0
  ) +
  geom_col(
    aes( y = 20),
    fill = "white",
    width = 1,
    alpha = 0.2,
    size = 0
  )

world_death_graph
```

### Recover Cases by Country

```{r echo=FALSE, warning=FALSE, results='hide'}
world_recovered_cases_df <- coronavirus %>% 
  filter(type == "recovered") %>%
  group_by(country, long, lat, province) %>%
  summarise(total_recovered = sum(cases)) %>%
  arrange(-total_recovered)

top_10_recovered_df <- world_recovered_cases_df[1:10,]

world_recover_graph <-
 ggplot(data = top_10_recovered_df,
        aes(x = reorder(country, total_recovered),
            y = total_recovered)) +
  geom_col(aes(fill = total_recovered)) +
    labs( x = "Country",
        y = "Total Recovered Cases",
        title = "Top 10 Countries With Higest Total Recovered Cases") +
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_fill_gradient2(low = "green", 
                       high = "green4",
                       midpoint = median(top_10_recovered_df$total_recovered)) +
  geom_col(
    aes( y= 40),
    fill = "white",
    width = 1,
    alpha = 0.2,
    size = 0
  ) +
  geom_col(
    aes( y = 20),
    fill = "white",
    width = 1,
    alpha = 0.2,
    size = 0
  )

world_recover_graph
```

## Timeline

### World Confirmed Cases Timeline
```{r echo=FALSE, warning=FALSE, results='hide'}
world_cases_by_date_df <- coronavirus %>% 
  filter(type == "confirmed") %>%
  group_by(date) %>%
  summarise(total_cases = sum(cases)) %>%
  arrange(-total_cases)

ggplot(world_cases_by_date_df, aes(date, total_cases)) +
  geom_line() +
  labs(x = "Month",
       y = "Cases",
       title ="Cumulative Confirmed Cases Worldwide") +
  theme(plot.title =element_text(hjust = 0.5)) +
  scale_x_date(labels = date_format("%b"), date_breaks = "1 month")
```

### Timeline of Higest Confirmed Cases by Country
```{r echo=FALSE, warning=FALSE, results='hide'}
# List of countries to include in the graph
country_list <- c("United States", "Brazil", "India", "Russia", "Mexico", "China", "Canada")

top_country_df <- cases_pc_df %>% 
  filter(country %in% country_list) %>%
  filter(type == "confirmed") %>%
  group_by(date, country, population) %>%
  summarise(total_cases = sum(cases)) %>%
  arrange(-total_cases)

total_cases <- top_country_df$total_cases
population <- top_country_df$population
top_country_df$`Total Cases` <- total_cases*1000000/population

# draw a line plot of total_cases vs. date, grouped and colored by country
g <- ggplot(data = top_country_df,
            aes(x = date,
                y = `Total Cases`,
                color = country,
                group = country)) +
  geom_line() +
  labs(x= "Month",
       y=" Daily Confirmed Cases per Million People",
       title = "Daily Confirmed Cases by Country (2020)") +
  theme(plot.title = element_text(hjust = 0.5)) +
  scale_x_date(labels = date_format("%b"), date_breaks  ="1 month")

ggly <- ggplotly(p = g,
                 width = 1000,
                 height = 700,
                 tooltip = c("date", "Total Cases", "group"))

ggly
```



# Worldmap Visualization
```{r echo=FALSE, warning=FALSE, results='hide'}
# Load packages and world map data
library(sf)
library(tmap)
library(spData)
library(viridis)
library(rnaturalearth)

world <-map_data("world")
```

```{r echo=FALSE, warning=FALSE, results='hide'}
breaks<- c(1, 30, 100, 1000, 50000, 100000)
labels<- c("1-29", "20-99", "100-999","1,000-49,999", "50,000-499,999", "100,000+")

confirm_map <- ggplot() +
  geom_polygon(data = world,
               aes(x = long, y = lat, group = group),
               fill = "grey", alpha = 0.3) +
  geom_point(data = world_confirmed_cases_df,
             aes(x = long,
                 y = lat,
                 size = total_cases,
                 color = total_cases,
                 text_country = country,
                 text_province = province,
                 text = paste("Deaths: ", total_cases)),
             alpha = 0.5) +
  scale_size_continuous(name = "Confirmed cases", trans="log", range=c(1,8),
                        breaks = breaks,labels=labels) +
  scale_colour_viridis_c(option = "plasma",
                         direction = -1,
                         name = "Confirmed cases",
                         trans = "log",
                         breaks = breaks,
                         labels = labels) +
  guides(colour=guide_legend()) + 
  theme_void() +
  labs(title = "Map of Confirmed Cases") +
  theme(legend.position="bottom",
        plot.title = element_text(hjust = 0.5))

confirm_map_plotly <- ggplotly(p = confirm_map,
                 width = 1000,
                 height = 700,
                 tooltip = c("text_country", "text_province", "text"))

confirm_map_plotly
```


```{r echo=FALSE, warning=FALSE, results='hide'}
breaks<- c(1, 30, 100, 1000, 50000, 100000)
labels<- c("1-29", "20-99", "100-999","1,000-49,999", "50,000-499,999", "100,000+")

death_map <- ggplot() +
  geom_polygon(data = world,
               aes(x = long, y = lat, group = group),
               fill = "grey", alpha = 0.3) +
  geom_point(data = world_death_cases_df,
             aes(x = long,
                 y = lat,
                 size = total_deaths,
                 color = total_deaths,
                 text_country = country,
                 text_province = province,
                 text = paste("Deaths: ", total_deaths)),
             alpha = 0.5) +
  scale_size_continuous(name = "Death cases", trans="log", range=c(1,8),
                        breaks = breaks,labels=labels) +
  scale_colour_viridis_c(option = "inferno",
                         direction = -1,
                         name = "Death cases",
                         trans = "log",
                         breaks = breaks,
                         labels = labels) +
  guides(colour=guide_legend()) + 
  theme_void() +
  labs(title = "Map of Death Cases") +
  theme(legend.position="bottom",
        plot.title = element_text(hjust = 0.5))

death_map_plotly <- ggplotly(p = death_map,
                 width = 1000,
                 height = 700,
                 tooltip = c("text_country", "text_province", "text"))

death_map_plotly

```

```{r echo=FALSE, warning=FALSE, results='hide'}
breaks<- c(0, 30, 100, 1000, 50000, 100000)
labels<- c("0", "1-29", "20-99", "100-999","1,000-49,999", "50,000+")

recover_map <- ggplot() +
  geom_polygon(data = world,
               aes(x = long, y = lat,group = group),
               fill = "grey", alpha = 0.3) +
  geom_point(data = world_recovered_cases_df,
             aes(x = long,
                 y = lat,
                 size = total_recovered,
                 color=total_recovered,
                 text_country = country,
                 text_province = province,
                 text = paste("Recovered: ", total_recovered)),
             alpha = 0.5) +
  scale_size_continuous(name = "Recovered cases", trans="log", range=c(1,8),
                        breaks = breaks,labels=labels) +
  scale_colour_viridis_c(option = "viridis",
                         direction = -1,
                         name = "Recovered cases",
                         trans = "log",
                         breaks = breaks,
                         labels = labels) +
  guides(colour=guide_legend()) + 
  theme_void() +
  labs(title = "Map of Recovered Cases") +
  theme(legend.position="bottom",
        plot.title = element_text(hjust = 0.5))

recover_map_plotly <- ggplotly(p = recover_map,
                 width = 1000,
                 height = 700,
                 tooltip = c("text_country", "text_province", "text"))

recover_map_plotly
```


